Last Update: 2019-02-06 14:36:10

Libraries

Before we start, let’s load a few libraries.

rm(list = ls())

set.seed(100)

options(warn = -1)

library(knitr)
library(ggplot2)
library(caret)
library(doParallel)

registerDoParallel(cores = (detectCores() - 1))

We register all but one core so we can have a lot of parallelsism when we start training our models.

Data Loading

Let’s read in our data.

data.2015 = read.csv("data/2015.csv")
data.2016 = read.csv("data/2016.csv")
data.2017 = read.csv("data/2017.csv")
data.2018 = read.csv("data/2018.csv")

Now, we will only deal with regular season events. So let’s remove the playoffs from our datasets.

get.regular.season = function(data) {
    subset(data, isPlayoffGame == 0)
}

season.2015 = get.regular.season(data.2015)
season.2016 = get.regular.season(data.2016)
season.2017 = get.regular.season(data.2017)
season.2018 = get.regular.season(data.2018)

Now let’s remove extraneous columns. At the end, we will have the following columns (I’ve changed their names for ease):

Old Column Name New Column Name
xCordAdjusted x
yCordAdjusted y
shotAngleAdjusted angle
shotDistance dist
goal goal
get.helpful.data = function(data) {
    data.frame(x = data$xCordAdjusted,
           y = data$yCordAdjusted,
           angle = data$shotAngleAdjusted,
           dist = data$shotDistance,
           team = data$teamCode,
           goal = data$goal)
}

analysis.2015 = get.helpful.data(season.2015)
analysis.2016 = get.helpful.data(season.2016)
analysis.2017 = get.helpful.data(season.2017)
analysis.2018 = get.helpful.data(season.2018)

Sometimes, there is incomplete data. Let’s just keep all the complete cases and remove the incomplete ones.

analysis.2015 = analysis.2015[complete.cases(analysis.2015),]
analysis.2016 = analysis.2016[complete.cases(analysis.2016),]
analysis.2017 = analysis.2017[complete.cases(analysis.2017),]
analysis.all = rbind(analysis.2017, rbind(analysis.2016, analysis.2015))
analysis.all = analysis.all[complete.cases(analysis.all),]
analysis.2018 = analysis.2018[complete.cases(analysis.2018),]

We’ll need a function to get team data.

get.team.data = function(data, code) {
    subset(data, team == code)
}

Creating the Models

With our data, we can start creating models. We’ll be creating the following models:

control = trainControl(method = "repeatedcv", number = 5, repeats = 2)

model.nnet = train(goal ~ . -goal -team,
                   data = analysis.all,
                   method = "nnet",
                   trControl = control)
## # weights:  31
## initial  value 79308.617571 
## iter  10 value 21110.433003
## iter  20 value 19408.328843
## iter  30 value 18788.755236
## iter  40 value 18693.310231
## iter  50 value 18597.284636
## iter  60 value 18567.439924
## iter  70 value 18548.289994
## iter  80 value 18540.726000
## iter  90 value 18529.751641
## iter 100 value 18523.279147
## final  value 18523.279147 
## stopped after 100 iterations
model.knn = train(goal ~ . -goal -team,
                  data = analysis.all,
                  method = "knn",
                  trControl = control)

Extracting Predictions

Our predictions will come from analysis.2018. Here’s what a little bit of that data looks like:

analysis.2018

Now, we can use the predict function to get our predictions.

nnet.prediction = predict(model.nnet, newdata = analysis.2018)
knn.prediction = predict(model.knn, newdata = analysis.2018)

nnet.prediction.data = data.frame(analysis.2018)
nnet.prediction.data$predict = nnet.prediction

knn.prediction.data = data.frame(analysis.2018)
knn.prediction.data$predict = knn.prediction

So, our Neural Network data looks like:

nnet.prediction.data

Our K-Nearest Neighbors data looks like:

knn.prediction.data

Visualizing the Predictions

With our predictions, let’s view how they differ. To make our lives easier, here’s a few plotting functions.

make.knn.plot.dist = function(data, primary, secondary, team) {
    name = paste(team, "Predicted Goal Probability from KNN Model", sep = " ")
    plot = ggplot(data) +
    geom_hex(aes(x = dist, y = predict, alpha = ..count..),
             fill = primary,
             color = secondary) +
    labs(title = name,
         x = "Distance from Net",
         y = "Probability of Scoring") +
    theme_minimal()
    plot
}

make.knn.plot.angle = function(data, primary, secondary, team) {
    name = paste(team, "Predicted Goal Probability from KNN Model", sep = " ")
    plot = ggplot(data) +
    geom_hex(aes(x = angle, y = predict, alpha = ..count..),
             fill = primary,
             color = secondary) +
    labs(title = name,
         x = "Angle of Shot on Net",
         y = "Probability of Scoring") +
    theme_minimal()
    plot
}

make.nnet.plot.dist = function(data, primary, secondary, team) {
    name = paste(team, "Predicted Goal Probability from NNet Model", sep = " ")
    plot = ggplot(data) +
    geom_hex(aes(x = dist, y = predict, alpha = ..count..),
             fill = primary,
             color = secondary) +
    labs(title = name,
         x = "Distance from Net",
         y = "Probability of Scoring") +
    theme_minimal()
    plot
}

make.nnet.plot.angle = function(data, primary, secondary, team) {
    name = paste(team, "Predicted Goal Probability from NNet Model", sep = " ")
    plot = ggplot(data) +
    geom_hex(aes(x = angle, y = predict, alpha = ..count..),
             fill = primary,
             color = secondary) +
    labs(title = name,
         x = "Angle of Shot on Net",
         y = "Probability of Scoring") +
    theme_minimal()
    plot
}

Let’s take a look at the NHL in it’s entirety.

plot.nnet.dist = make.nnet.plot.dist(nnet.prediction.data,
                                     "orange",
                                     "black",
                                     "NHL")
plot.nnet.angle = make.nnet.plot.angle(nnet.prediction.data,
                                       "orange",
                                       "black",
                                       "NHL")

plot.knn.dist = make.knn.plot.dist(knn.prediction.data,
                                   "orange",
                                   "black",
                                   "NHL")
plot.knn.angle = make.knn.plot.angle(knn.prediction.data,
                                     "orange",
                                     "black",
                                     "NHL")

Here is our neural net model:

plot.nnet.dist

plot.nnet.angle

Here is our knn model:

plot.knn.dist

plot.knn.angle

Analysis

Pittsburgh Penguins

Let’s first get their data.

pit.nnet = get.team.data(nnet.prediction.data, "PIT")
pit.knn = get.team.data(knn.prediction.data, "PIT")

Now, let’s see how the Penguins fared in our models.

pit.plot.nnet.dist = make.nnet.plot.dist(pit.nnet,
                                         "#000000",
                                         "#FCB514",
                                         "Pittsburgh")
pit.plot.nnet.angle = make.nnet.plot.angle(pit.nnet,
                                           "#000000",
                                           "#FCB514",
                                           "Pittsburgh")

pit.plot.knn.dist = make.knn.plot.dist(pit.knn,
                                       "#000000",
                                       "#FCB514",
                                       "Pittsburgh")
pit.plot.knn.angle = make.knn.plot.angle(pit.knn,
                                         "#000000",
                                         "#FCB514",
                                         "Pittsburgh")

Here is the neural network plots:

pit.plot.nnet.dist

pit.plot.nnet.angle

Here is the K nearest neighbors plots:

pit.plot.knn.dist

pit.plot.knn.angle

Boston Bruins

Let’s first get their data.

bos.nnet = get.team.data(nnet.prediction.data, "BOS")
bos.knn = get.team.data(knn.prediction.data, "BOS")

Now, let’s see how the Bruins fared in our models.

bos.plot.nnet.dist = make.nnet.plot.dist(bos.nnet,
                                         "#FFB81C",
                                         "#000000",
                                         "Boston")
bos.plot.nnet.angle = make.nnet.plot.angle(bos.nnet,
                                           "#FFB81C",
                                           "#000000",
                                           "Boston")

bos.plot.knn.dist = make.knn.plot.dist(bos.knn,
                                       "#FFB81C",
                                       "#000000",
                                       "Boston")
bos.plot.knn.angle = make.knn.plot.angle(bos.knn,
                                         "#FFB81C",
                                         "#000000",
                                         "Boston")

Here is the neural network plots:

bos.plot.nnet.dist

bos.plot.nnet.angle

Here is the K nearest neighbors plots:

bos.plot.knn.dist

bos.plot.knn.angle

Tampa Bay Lightning

Let’s first get their data.

tbl.nnet = get.team.data(nnet.prediction.data, "T.B")
tbl.knn = get.team.data(knn.prediction.data, "T.B")

Now, let’s see how the Lightning fared in our models.

tbl.plot.nnet.dist = make.nnet.plot.dist(tbl.nnet,
                                         "#002868",
                                         "#FFFFFF",
                                         "Tampa Bay")
tbl.plot.nnet.angle = make.nnet.plot.angle(tbl.nnet,
                                           "#002868",
                                           "#FFFFFF",
                                           "Tampa Bay")

tbl.plot.knn.dist = make.knn.plot.dist(tbl.knn,
                                       "#002868",
                                       "#FFFFFF",
                                       "Tampa Bay")
tbl.plot.knn.angle = make.knn.plot.angle(tbl.knn,
                                         "#002868",
                                         "#FFFFFF",
                                         "Tampa Bay")

Here is the neural network plots:

tbl.plot.nnet.dist

tbl.plot.nnet.angle

Here is the K nearest neighbors plots:

tbl.plot.knn.dist

tbl.plot.knn.angle

San Jose Sharks

Let’s first get their data.

sjs.nnet = get.team.data(nnet.prediction.data, "S.J")
sjs.knn = get.team.data(knn.prediction.data, "S.J")

Now, let’s see how the Sharks fared in our models.

sjs.plot.nnet.dist = make.nnet.plot.dist(sjs.nnet,
                                         "#006D75",
                                         "#EA7200",
                                         "San Jose")
sjs.plot.nnet.angle = make.nnet.plot.angle(sjs.nnet,
                                           "#006D75",
                                           "#EA7200",
                                           "San Jose")

sjs.plot.knn.dist = make.knn.plot.dist(sjs.knn,
                                       "#006D75",
                                       "#EA7200",
                                       "San Jose")
sjs.plot.knn.angle = make.knn.plot.angle(sjs.knn,
                                         "#006D75",
                                         "#EA7200",
                                         "San Jose")

Here is the neural network plots:

sjs.plot.nnet.dist

sjs.plot.nnet.angle

Here is the K nearest neighbors plots:

sjs.plot.knn.dist

sjs.plot.knn.angle

Nashville Predators

Let’s first get their data.

nsh.nnet = get.team.data(nnet.prediction.data, "NSH")
nsh.knn = get.team.data(knn.prediction.data, "NSH")

Now, let’s see how the Predators fared in our models.

nsh.plot.nnet.dist = make.nnet.plot.dist(nsh.nnet,
                                         "#FFB81C",
                                         "#041E42",
                                         "Nashville")
nsh.plot.nnet.angle = make.nnet.plot.angle(nsh.nnet,
                                           "#FFB81C",
                                           "#041E42",
                                           "Nashville")

nsh.plot.knn.dist = make.knn.plot.dist(nsh.knn,
                                       "#FFB81C",
                                       "#041E42",
                                       "Nashville")
nsh.plot.knn.angle = make.knn.plot.angle(nsh.knn,
                                         "#FFB81C",
                                         "#041E42",
                                         "Nashville")

Here is the neural network plots:

nsh.plot.nnet.dist

nsh.plot.nnet.angle

Here is the K nearest neighbors plots:

nsh.plot.knn.dist

nsh.plot.knn.angle

Los Angeles Kings

Let’s first get their data.

lak.nnet = get.team.data(nnet.prediction.data, "L.A")
lak.knn = get.team.data(knn.prediction.data, "L.A")

Now, let’s see how the Kings fared in our models.

lak.plot.nnet.dist = make.nnet.plot.dist(lak.nnet,
                                         "#111111",
                                         "#A2AAAD",
                                         "Los Angeles")
lak.plot.nnet.angle = make.nnet.plot.angle(lak.nnet,
                                           "#111111",
                                           "#A2AAAD",
                                           "Los Angeles")

lak.plot.knn.dist = make.knn.plot.dist(lak.knn,
                                       "#111111",
                                       "#A2AAAD",
                                       "Los Angeles")
lak.plot.knn.angle = make.knn.plot.angle(lak.knn,
                                         "#111111",
                                         "#A2AAAD",
                                         "Los Angeles")

Here is the neural network plots:

lak.plot.nnet.dist

lak.plot.nnet.angle

Here is the K nearest neighbors plots:

lak.plot.knn.dist

lak.plot.knn.angle